suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(MASS))
suppressPackageStartupMessages(library(xtable))
suppressPackageStartupMessages(library(lattice))
suppressPackageStartupMessages(library(latticeExtra))
suppressPackageStartupMessages(library(car))
suppressPackageStartupMessages(library(beepr))
suppressPackageStartupMessages(library(lme4))
suppressPackageStartupMessages(library(lmerTest))
suppressPackageStartupMessages(library(LMERConvenienceFunctions)) # for perSubjectTrim
suppressPackageStartupMessages(library(languageR))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(grid))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(plyr))
suppressPackageStartupMessages(library(rms)) # for varclus
suppressPackageStartupMessages(library(languageR))
source('CIfunctions.R')
cat(paste('number of subjects = ',30,'\n',
'---','\n',
'number of trials per subject = ',256,'\n',
'---','\n',
'number of unique cells in the design = Item(4) * Number(2) * Vagueness(2) * Order(2) * Quantity(2) = ',4*2*2*2*2,'\n',
'number of times a subject saw a unique cell in the design = 256 / 64 = ', 256/64,'\n',
'---','\n',
'number of higher-level cells in the design (abstracting over Order and Quantity) = Item(4) * Number(2) * Vagueness(2) = ',4*2*2,'\n',
'number of times a subject saw a higher level cell in the design = = 256 / 16 = ', 4*2*2, '\n',
sep=''))
## number of subjects = 30
## ---
## number of trials per subject = 256
## ---
## number of unique cells in the design = Item(4) * Number(2) * Vagueness(2) * Order(2) * Quantity(2) = 64
## number of times a subject saw a unique cell in the design = 256 / 64 = 4
## ---
## number of higher-level cells in the design (abstracting over Order and Quantity) = Item(4) * Number(2) * Vagueness(2) = 16
## number of times a subject saw a higher level cell in the design = = 256 / 16 = 16
x=445
lambda = c(-2,-1,-0.5, 0,0.5,1,2)
xd=data.frame(
lambda = c(
lambda[1],
lambda[2],
lambda[3],
lambda[4],
lambda[5],
lambda[6],
lambda[7]
),
x=c(
paste("x = ", sep="", x),
paste("x = ", sep="", x),
paste("x = ", sep="", x),
paste("x = ", sep="", x),
paste("x = ", sep="", x),
paste("x = ", sep="", x),
paste("x = ", sep="", x)
),
conventional=c(
'$$y=\\frac{1}{x^2}$$',
'$$y=\\frac{1}{x}$$',
'$$y=\\frac{1}{\\sqrt{x}}$$',
'$$y=log_e(x)$$',
'$$y=\\sqrt{x}$$',
'$$y=x$$',
'$$y=x^2$$'
),
power=c(
'$$y=x^{-2}$$',
'$$y=x^{-1}$$',
'$$y=x^{-0.5}$$',
'$$y=log_e(x)$$',
'$$y=x^{0.5}$$',
'$$y=x^{1}$$',
'$$y=x^{2}$$'
),
y=c(
paste("y = ", sep="", format(round(x^lambda[1],6), sc=F) ),
paste("y = ", sep="", round(x^lambda[2],3) ),
paste("y = ", sep="", round(x^lambda[3],3) ),
paste("y = ", sep="", round(log(x),3) ),
paste("y = ", sep="", round(x^lambda[5],2) ),
paste("y = ", sep="", round(x^lambda[6],6) ),
paste("y = ", sep="", round(x^lambda[7],6) )
),
back = c(
'$$x=y^{\\frac{1}{-2}}$$',
'$$x=y^{\\frac{1}{-1}}$$',
'$$x=y^{\\frac{1}{-0.5}}$$',
'$$x=exp(y)$$',
'$$x=y^{\\frac{1}{0.5}}$$',
'$$x=y^{\\frac{1}{1}}$$',
'$$x=y^{\\frac{1}{2}}$$'
),
back_lambda=c(
'$$x=y^{1/\\lambda}$$',
'$$x=y^{1/\\lambda}$$',
'$$x=y^{1/\\lambda}$$',
'$$x=exp(y)$$',
'$$x=y^{1/\\lambda}$$',
'$$x=y^{1/\\lambda}$$',
'$$x=y^{1/\\lambda}$$'
),
y_back=c(
paste( 'x = ', (x^lambda[1]) ^ {1/lambda[1]} ,sep=""),
paste( 'x = ', (x^lambda[2]) ^ {1/lambda[2]} ,sep=""),
paste( 'x = ', (x^lambda[3]) ^ {1/lambda[3]} ,sep=""),
paste( 'x = ', exp(log(x)), sep=""),
paste( 'x = ', (x^lambda[5]) ^ {1/lambda[5]} ,sep=""),
paste( 'x = ', (x^lambda[6]) ^ {1/lambda[6]} ,sep=""),
paste( 'x = ', (x^lambda[7]) ^ {1/lambda[7]} ,sep="")
)
)
print(xtable(xd, digits=1, align=c(rep('l', times=ncol(xd)+1))), type='html', include.rownames=F, digits=1)
| lambda | x | conventional | power | y | back | back_lambda | y_back |
|---|---|---|---|---|---|---|---|
| -2.0 | x = 445 | \[y=\frac{1}{x^2}\] | \[y=x^{-2}\] | y = 0.000005 | \[x=y^{\frac{1}{-2}}\] | \[x=y^{1/\lambda}\] | x = 445 |
| -1.0 | x = 445 | \[y=\frac{1}{x}\] | \[y=x^{-1}\] | y = 0.002 | \[x=y^{\frac{1}{-1}}\] | \[x=y^{1/\lambda}\] | x = 445 |
| -0.5 | x = 445 | \[y=\frac{1}{\sqrt{x}}\] | \[y=x^{-0.5}\] | y = 0.047 | \[x=y^{\frac{1}{-0.5}}\] | \[x=y^{1/\lambda}\] | x = 445 |
| 0.0 | x = 445 | \[y=log_e(x)\] | \[y=log_e(x)\] | y = 6.098 | \[x=exp(y)\] | \[x=exp(y)\] | x = 445 |
| 0.5 | x = 445 | \[y=\sqrt{x}\] | \[y=x^{0.5}\] | y = 21.1 | \[x=y^{\frac{1}{0.5}}\] | \[x=y^{1/\lambda}\] | x = 445 |
| 1.0 | x = 445 | \[y=x\] | \[y=x^{1}\] | y = 445 | \[x=y^{\frac{1}{1}}\] | \[x=y^{1/\lambda}\] | x = 445 |
| 2.0 | x = 445 | \[y=x^2\] | \[y=x^{2}\] | y = 198025 | \[x=y^{\frac{1}{2}}\] | \[x=y^{1/\lambda}\] | x = 445 |
Load the file dat.Rda if it exists, else Get the data if file dat.Rda does not already exist.
# if the output file dat.Rda already exists then load it, else do data wrangling
if( file.exists('dat.Rda') && file.exists('dd.Rda') ) {
load('dat.Rda'); load('dd.Rda')
} else {
gatherData = function(number_of_valid_subjects) {
data_dir <- '../experimentCode/output/'
column.headers.df <- head( read.table(
paste(data_dir,'subject1.data',sep=''),
header=TRUE),0)
gathered.data <- column.headers.df
for (subject in 1:number_of_valid_subjects) {
current.filename <- paste(data_dir, 'subject', subject, '.data', sep='')
current.data <- read.table(file=current.filename, header=TRUE, stringsAsFactors = FALSE)
gathered.data <- rbind(gathered.data, current.data)
}
return(gathered.data)
} # end of gatherData function
classifyResponse = function(dat) {
# what were they expected to respond?
dat$crossed = as.factor(paste('Con', dat$Condition, ':Quan', dat$Quantity, ':Item', dat$Item, sep=''))
dat[dat$crossed=='Con1:Quan1:Item1', 'Exp_Num'] <- 6
dat[dat$crossed=='Con1:Quan1:Item1', 'Bline_Num'] <- 15
dat[dat$crossed=='Con1:Quan1:Item1', 'Extr_Num'] <- 24
dat[dat$crossed=='Con1:Quan1:Item2', 'Exp_Num'] <- 16
dat[dat$crossed=='Con1:Quan1:Item2', 'Bline_Num'] <- 25
dat[dat$crossed=='Con1:Quan1:Item2', 'Extr_Num'] <- 34
dat[dat$crossed=='Con1:Quan1:Item3', 'Exp_Num'] <- 26
dat[dat$crossed=='Con1:Quan1:Item3', 'Bline_Num'] <- 35
dat[dat$crossed=='Con1:Quan1:Item3', 'Extr_Num'] <- 44
dat[dat$crossed=='Con1:Quan1:Item4', 'Exp_Num'] <- 36
dat[dat$crossed=='Con1:Quan1:Item4', 'Bline_Num'] <- 45
dat[dat$crossed=='Con1:Quan1:Item4', 'Extr_Num'] <- 54
dat[dat$crossed=='Con1:Quan2:Item1', 'Exp_Num'] <- 24
dat[dat$crossed=='Con1:Quan2:Item1', 'Bline_Num'] <- 15
dat[dat$crossed=='Con1:Quan2:Item1', 'Extr_Num'] <- 6
dat[dat$crossed=='Con1:Quan2:Item2', 'Exp_Num'] <- 34
dat[dat$crossed=='Con1:Quan2:Item2', 'Bline_Num'] <- 25
dat[dat$crossed=='Con1:Quan2:Item2', 'Extr_Num'] <- 16
dat[dat$crossed=='Con1:Quan2:Item3', 'Exp_Num'] <- 44
dat[dat$crossed=='Con1:Quan2:Item3', 'Bline_Num'] <- 35
dat[dat$crossed=='Con1:Quan2:Item3', 'Extr_Num'] <- 26
dat[dat$crossed=='Con1:Quan2:Item4', 'Exp_Num'] <- 54
dat[dat$crossed=='Con1:Quan2:Item4', 'Bline_Num'] <- 45
dat[dat$crossed=='Con1:Quan2:Item4', 'Extr_Num'] <- 36
dat[dat$crossed=='Con2:Quan1:Item1', 'Exp_Num'] <- 6
dat[dat$crossed=='Con2:Quan1:Item1', 'Bline_Num'] <- 15
dat[dat$crossed=='Con2:Quan1:Item1', 'Extr_Num'] <- 24
dat[dat$crossed=='Con2:Quan1:Item2', 'Exp_Num'] <- 16
dat[dat$crossed=='Con2:Quan1:Item2', 'Bline_Num'] <- 25
dat[dat$crossed=='Con2:Quan1:Item2', 'Extr_Num'] <- 34
dat[dat$crossed=='Con2:Quan1:Item3', 'Exp_Num'] <- 26
dat[dat$crossed=='Con2:Quan1:Item3', 'Bline_Num'] <- 35
dat[dat$crossed=='Con2:Quan1:Item3', 'Extr_Num'] <- 44
dat[dat$crossed=='Con2:Quan1:Item4', 'Exp_Num'] <- 36
dat[dat$crossed=='Con2:Quan1:Item4', 'Bline_Num'] <- 45
dat[dat$crossed=='Con2:Quan1:Item4', 'Extr_Num'] <- 54
dat[dat$crossed=='Con2:Quan2:Item1', 'Exp_Num'] <- 24
dat[dat$crossed=='Con2:Quan2:Item1', 'Bline_Num'] <- 15
dat[dat$crossed=='Con2:Quan2:Item1', 'Extr_Num'] <- 6
dat[dat$crossed=='Con2:Quan2:Item2', 'Exp_Num'] <- 34
dat[dat$crossed=='Con2:Quan2:Item2', 'Bline_Num'] <- 25
dat[dat$crossed=='Con2:Quan2:Item2', 'Extr_Num'] <- 16
dat[dat$crossed=='Con2:Quan2:Item3', 'Exp_Num'] <- 44
dat[dat$crossed=='Con2:Quan2:Item3', 'Bline_Num'] <- 35
dat[dat$crossed=='Con2:Quan2:Item3', 'Extr_Num'] <- 26
dat[dat$crossed=='Con2:Quan2:Item4', 'Exp_Num'] <- 54
dat[dat$crossed=='Con2:Quan2:Item4', 'Bline_Num'] <- 45
dat[dat$crossed=='Con2:Quan2:Item4', 'Extr_Num'] <- 36
dat[dat$crossed=='Con3:Quan1:Item1', 'Exp_Num'] <- 6
dat[dat$crossed=='Con3:Quan1:Item1', 'Bline_Num'] <- 15
dat[dat$crossed=='Con3:Quan1:Item1', 'Extr_Num'] <- 24
dat[dat$crossed=='Con3:Quan1:Item2', 'Exp_Num'] <- 16
dat[dat$crossed=='Con3:Quan1:Item2', 'Bline_Num'] <- 25
dat[dat$crossed=='Con3:Quan1:Item2', 'Extr_Num'] <- 34
dat[dat$crossed=='Con3:Quan1:Item3', 'Exp_Num'] <- 26
dat[dat$crossed=='Con3:Quan1:Item3', 'Bline_Num'] <- 35
dat[dat$crossed=='Con3:Quan1:Item3', 'Extr_Num'] <- 44
dat[dat$crossed=='Con3:Quan1:Item4', 'Exp_Num'] <- 36
dat[dat$crossed=='Con3:Quan1:Item4', 'Bline_Num'] <- 45
dat[dat$crossed=='Con3:Quan1:Item4', 'Extr_Num'] <- 54
dat[dat$crossed=='Con3:Quan2:Item1', 'Exp_Num'] <- 24
dat[dat$crossed=='Con3:Quan2:Item1', 'Bline_Num'] <- 15
dat[dat$crossed=='Con3:Quan2:Item1', 'Extr_Num'] <- 6
dat[dat$crossed=='Con3:Quan2:Item2', 'Exp_Num'] <- 34
dat[dat$crossed=='Con3:Quan2:Item2', 'Bline_Num'] <- 25
dat[dat$crossed=='Con3:Quan2:Item2', 'Extr_Num'] <- 16
dat[dat$crossed=='Con3:Quan2:Item3', 'Exp_Num'] <- 44
dat[dat$crossed=='Con3:Quan2:Item3', 'Bline_Num'] <- 35
dat[dat$crossed=='Con3:Quan2:Item3', 'Extr_Num'] <- 26
dat[dat$crossed=='Con3:Quan2:Item4', 'Exp_Num'] <- 54
dat[dat$crossed=='Con3:Quan2:Item4', 'Bline_Num'] <- 45
dat[dat$crossed=='Con3:Quan2:Item4', 'Extr_Num'] <- 36
dat[dat$crossed=='Con4:Quan1:Item1', 'Exp_Num'] <- 6
dat[dat$crossed=='Con4:Quan1:Item1', 'Bline_Num'] <- 15
dat[dat$crossed=='Con4:Quan1:Item1', 'Extr_Num'] <- 24
dat[dat$crossed=='Con4:Quan1:Item2', 'Exp_Num'] <- 16
dat[dat$crossed=='Con4:Quan1:Item2', 'Bline_Num'] <- 25
dat[dat$crossed=='Con4:Quan1:Item2', 'Extr_Num'] <- 34
dat[dat$crossed=='Con4:Quan1:Item3', 'Exp_Num'] <- 26
dat[dat$crossed=='Con4:Quan1:Item3', 'Bline_Num'] <- 35
dat[dat$crossed=='Con4:Quan1:Item3', 'Extr_Num'] <- 44
dat[dat$crossed=='Con4:Quan1:Item4', 'Exp_Num'] <- 36
dat[dat$crossed=='Con4:Quan1:Item4', 'Bline_Num'] <- 45
dat[dat$crossed=='Con4:Quan1:Item4', 'Extr_Num'] <- 54
dat[dat$crossed=='Con4:Quan2:Item1', 'Exp_Num'] <- 24
dat[dat$crossed=='Con4:Quan2:Item1', 'Bline_Num'] <- 15
dat[dat$crossed=='Con4:Quan2:Item1', 'Extr_Num'] <- 6
dat[dat$crossed=='Con4:Quan2:Item2', 'Exp_Num'] <- 34
dat[dat$crossed=='Con4:Quan2:Item2', 'Bline_Num'] <- 25
dat[dat$crossed=='Con4:Quan2:Item2', 'Extr_Num'] <- 16
dat[dat$crossed=='Con4:Quan2:Item3', 'Exp_Num'] <- 44
dat[dat$crossed=='Con4:Quan2:Item3', 'Bline_Num'] <- 35
dat[dat$crossed=='Con4:Quan2:Item3', 'Extr_Num'] <- 26
dat[dat$crossed=='Con4:Quan2:Item4', 'Exp_Num'] <- 54
dat[dat$crossed=='Con4:Quan2:Item4', 'Bline_Num'] <- 45
dat[dat$crossed=='Con4:Quan2:Item4', 'Extr_Num'] <- 36
dat$crossed <- NULL
# what side LEFT, MIDDLE, RIGHT corresponds with Expected, Borderline, Extreme?
for (row in 1:nrow(dat)) {
if (dat[row, 'Exp_Num'] == dat[row, 'Left']) {dat[row, 'Exp_side'] <- 'left'}
if (dat[row, 'Exp_Num'] == dat[row, 'Mid']) {dat[row, 'Exp_side'] <- 'mid'}
if (dat[row, 'Exp_Num'] == dat[row, 'Right']) {dat[row, 'Exp_side'] <- 'right'}
if (dat[row, 'Bline_Num'] == dat[row, 'Left']) {dat[row, 'Bline_side'] <- 'left'}
if (dat[row, 'Bline_Num'] == dat[row, 'Mid']) {dat[row, 'Bline_side'] <- 'mid'}
if (dat[row, 'Bline_Num'] == dat[row, 'Right']) {dat[row, 'Bline_side'] <- 'right'}
if (dat[row, 'Extr_Num'] == dat[row, 'Left']) {dat[row, 'Extr_side'] <- 'left'}
if (dat[row, 'Extr_Num'] == dat[row, 'Mid']) {dat[row, 'Extr_side'] <- 'mid'}
if (dat[row, 'Extr_Num'] == dat[row, 'Right']) {dat[row, 'Extr_side'] <- 'right'}
}
# what button press did the subject actually make? LEFT, MIDDLE, RIGHT, NOANSWER?
dat$RESPONSE <- as.factor(dat$RESPONSE)
# what number of dots corresponds with the subject's button press?
for (row in 1 : nrow(dat) ) {
switch(as.character(dat[row,'RESPONSE']),
'LEFT' = {dat[row, 'response_num'] <- dat[row, 'Left']},
'MIDDLE' = {dat[row, 'response_num'] <- dat[row, 'Mid']},
'RIGHT' = {dat[row, 'response_num'] <- dat[row, 'Right']},
'NOANSWER' = {dat[row, 'response_num'] <- NA}
)
}
# what side was the subject's button-press? Left, mid right?
dat$response_side <- tolower(dat$RESPONSE)
dat$response_side[dat$response_side == "middle"] <- 'mid'
dat$response_side <- factor(dat$response_side, exclude="noanswer")
# what category was the subject's response? Expected, Borderline, Extreme
dat$response_category <- "nocat"
for (row in row.names(na.omit(dat))) {
if (dat[row, 'response_num'] == dat[row, 'Exp_Num']) {dat[row, 'response_category'] <- 'expected'}
if (dat[row, 'response_num'] == dat[row, 'Bline_Num']) {dat[row, 'response_category'] <- 'borderline'}
if (dat[row, 'response_num'] == dat[row, 'Extr_Num']) {dat[row, 'response_category'] <- 'extreme'}
}
dat$response_category <- factor(dat$response_category, exclude="nocat")
dat$RESPONSE <- NULL
return(dat)
} # end of classifyResponse function
# declare local variables
number_of_valid_subjects <- 30 # = 30
number_of_rows <- 7680 # 7680
number_of_trials_per_subject <- number_of_rows / number_of_valid_subjects # = 256
dat <- gatherData(number_of_valid_subjects) # = 30
dat <- classifyResponse(dat) # classify the response as expected, near, or far
# SUBJECT
dat$Subject=factor(paste("s",sprintf("%02d",dat$Subject),sep=""))
# TRIAL
dat$Trial = rep(x = 1:number_of_trials_per_subject, times = number_of_valid_subjects)
# make a centred Trial for modeling
dat$c_Trl <-dat$Trial - mean(dat$Trial)
# make a scaled Trial for modelling
dat$s_Trl <- as.numeric(scale(dat$Trial))
# ID
# id is a unique identifier for the 7680 row data
dat$id <- factor(paste(paste(dat$Subject),
paste("t", sprintf("%03d", dat$Trial), sep="") , sep=":"))
# ITEM
# create a centred numeric item variable for modeling
dat$c_Itm <- ifelse(dat$Item==1, -.75,
ifelse(dat$Item==2, -.25,
ifelse(dat$Item==3, .25, .75)))
# make Item be a factor and assign labels
dat$Item <- factor(dat$Item, levels=c(1,2,3,4), labels=c("06:15:24", "16:25:34", "26:35:44", "36:45:54"))
# add ratios for Item
item_range_ratio = c(6 / 24, 16 / 34, 26 / 44, 36 / 54)
# 0.2500000 0.4705882 0.5909091 0.6666667
item_range_ratio_scaled = as.vector(scale(c(6 / 24, 16 / 34, 26 / 44, 36 / 54)))
# -1.3441995 -0.1316642 0.5297187 0.9461450
item_mean_ratio = c(mean(c(6 / 15, 15 / 24)), mean(c(16 / 25, 25 / 34)), mean(c(26 /35, 35 / 44)), mean(c(36 / 45, 45 / 54)))
# 0.5125000 0.6876471 0.7691558 0.8166667
item_mean_ratio_scaled = as.vector(scale(c(mean(c(6 / 15, 15 / 24)), mean(c(16 / 25, 25 / 34)), mean(c(26 /35, 35 / 44)), mean(c(36 / 45, 45 / 54)))))
# -1.37582241 -0.06614191 0.54334858 0.89861574
dat[dat$Item == "06:15:24", "item_range_ratio"] <- 0.2500000
dat[dat$Item == "16:25:34", "item_range_ratio"] <- 0.4705882
dat[dat$Item == "26:35:44", "item_range_ratio"] <- 0.5909091
dat[dat$Item == "36:45:54", "item_range_ratio"] <- 0.6666667
dat[dat$Item == "06:15:24", "item_range_ratio_scaled"] <- -1.3441995
dat[dat$Item == "16:25:34", "item_range_ratio_scaled"] <- -0.1316642
dat[dat$Item == "26:35:44", "item_range_ratio_scaled"] <- 0.5297187
dat[dat$Item == "36:45:54", "item_range_ratio_scaled"] <- 0.9461450
dat[dat$Item == "06:15:24", "item_mean_ratio"] <- 0.5125000
dat[dat$Item == "16:25:34", "item_mean_ratio"] <- 0.6876471
dat[dat$Item == "26:35:44", "item_mean_ratio"] <- 0.7691558
dat[dat$Item == "36:45:54", "item_mean_ratio"] <- 0.8166667
dat[dat$Item == "06:15:24", "item_mean_ratio_scaled"] <- -1.37582241
dat[dat$Item == "16:25:34", "item_mean_ratio_scaled"] <- -0.06614191
dat[dat$Item == "26:35:44", "item_mean_ratio_scaled"] <- 0.54334858
dat[dat$Item == "36:45:54", "item_mean_ratio_scaled"] <- 0.89861574
# VAGUENESS
# Create a factor coding for Vagueness
dat[ dat$Condition==1 , 'Vagueness'] <- 'Vague'
dat[ dat$Condition==2 , 'Vagueness'] <- 'Crisp'
dat[ dat$Condition==3 , 'Vagueness'] <- 'Vague'
dat[ dat$Condition==4 , 'Vagueness'] <- 'Crisp'
dat$Vagueness <- as.factor(dat$Vagueness)
# manually center Vagueness
dat$c_Vag <- ifelse(dat$Vagueness=='Crisp', -.5, .5)
# NUMBER
# Create a factor coding for Number use
dat[ dat$Condition==1 , 'Number'] <- 'Numeric'
dat[ dat$Condition==2 , 'Number'] <- 'Numeric'
dat[ dat$Condition==3 , 'Number'] <- 'Verbal'
dat[ dat$Condition==4 , 'Number'] <- 'Verbal'
dat$Number <- as.factor(dat$Number)
# manually center Number
dat$c_Num <- ifelse(dat$Number=='Numeric', -.5, .5)
# CONDITION
# make a factor out of Condition, as f_Cnd
dat$f_Cnd <- factor(dat$Condition, levels=c(1,2,3,4), labels=c('Vg:Nm', 'Cr:Nm', 'Vg:Vb', 'Cr:Vb'))
# ORDER
# give the levels of Order meaningful names
dat$Order <- factor(dat$Order, levels=c(1,2), labels=c('LtoR','RtoL'))
# make a manually centred Order
dat$c_Ord <- ifelse(dat$Order=="LtoR",-.5,.5)
# QUANTITY
# give the levels of Quantity meaningful names
dat$Quantity <- factor(dat$Quantity, levels=c(1,2), labels=c('Small','Large'))
# make a manually centred Quantity
dat$c_Qty <- ifelse(dat$Quantity=="Small",-.5,.5)
# INSTRUCTION
# add number of characters in the instruction # 29 30 34 36 38
dat$nchar_instr = nchar(dat$Instruction)
dat$nchar_instr_scaled = as.vector(scale(nchar(dat$Instruction), scale=TRUE))
# make Instruction be a factor (17 levels)
dat$Instruction <- as.factor(dat$Instruction)
# RT
# add transformations of RT
dat$RT_RecSqd <- 1/dat$RT^2
dat$RT_Rec <- 1/dat$RT
dat$RT_RecSqt <- 1/sqrt(dat$RT)
dat$RT_log <- log(dat$RT)
dat$RT_sqt <- sqrt(dat$RT)
dat$RT_raw <- dat$RT
dat$RT_sqd <- dat$RT^2
# print to file a table with information about the design
design_info <- unique(subset(dat, select=c(Item, Condition, Vagueness, Number, Quantity, Order,
Left, Mid, Right, Exp_Num, Bline_Num, Extr_Num, Exp_side, Bline_side, Extr_side,Instruction)))
design.info <- design_info[order(design_info$Item, design_info$Condition, design_info$Quantity, design_info$Order),]
row.names(design_info) <- NULL
capture.output(print.data.frame(design_info, row.names=F, print.gap=3, quote=F, right=F),
file="design_info.table")
# put dat in better column order
dat <- subset(dat,
select = c(id, Subject, Trial, Condition, Order, Quantity, Vagueness, Number,
Item, item_range_ratio, item_range_ratio_scaled, item_mean_ratio, item_mean_ratio_scaled,
c_Trl, s_Trl, c_Itm, c_Vag, c_Num, f_Cnd, c_Ord, c_Qty,
RT, RT_RecSqd, RT_Rec, RT_RecSqt, RT_log, RT_sqt, RT_raw, RT_sqd,
Exp_Num, Bline_Num, Extr_Num, Exp_side, Bline_side, Extr_side, response_num, response_side, response_category, Left, Mid, Right,
Instruction, nchar_instr
))
# save dat as dat.Rda
# This data set contains *all* trials 7680 including impossible trials and is
# mainly for graphs comparing different removals
save(dat, file="dat.Rda")
# remove impossible trials from dat in new data dd #
# Throw out RT = 1 and RT = 59998, and RTprev = 1 and RTprev = 59998
# i.e., throw out sticky fingers and timeouts,
# and the trials that followed sticky fingers and timeouts
# since they were likely affected by unusual previous trials.
# lose impossible trials
dd <- dat
dd$RT[dd$RT == 1 ] <- NA
dd$RT[dd$RT == 59998 ] <- NA
dd <- dd[complete.cases(dd),]
row.names(dd) <- NULL
# add preceding RT: because we removed impossible trials, the value for preceding RT for a trial following an impossible trial is the value of the trial that preceded the impossible trial.
dd$RTprev <- NA
for (s in levels(dd$Subject)) {
nrows = nrow(dd[dd$Subject==s,])
for (i in 1:nrows) {
if (i==1) {
dd[dd$Subject==s, "RTprev"][i] <- dd[dd$Subject==s, "RT"][i]
}
else
dd[dd$Subject==s, "RTprev"][i] <- dd[dd$Subject==s, "RT"][i-1]
}
}
# add transformations of previous RT
dd$RTprev_RecSqd <- 1/dd$RTprev^2
dd$RTprev_Rec <- 1/dd$RTprev
dd$RTprev_RecSqt0 <- 1/sqrt(dd$RTprev)
dd$RTprev_RecSqt <- as.vector(scale( 1/sqrt(dd$RTprev) ))
dd$RTprev_log <- log(dd$RTprev)
dd$RTprev_sqt <- sqrt(dd$RTprev)
dd$RTprev_raw <- dd$RTprev
dd$RTprev_sqd <- dd$RTprev^2
# number of higher-level cells in the design (abstracting over Order) =
# Item(4) * Number(2) * Vagueness(2) * Quantity(2) = 32
# number of measurements in eaach cell = 8
# 332 * 8 = 256
# 256 * 30 = 7680
dd$measurement=0
dd$cell=0
for (s in levels(dd$Subject)) {
cellcount=0
for (i in levels(dd$Item)) {
for (n in levels(dd$Number)) {
for (v in levels(dd$Vagueness)) {
for (q in levels(dd$Quantity)) {
measurementcount=0
cellcount=cellcount+1
for (t in dd[dd$Subject==s & dd$Item==i & dd$Number==n & dd$Vagueness==v & dd$Quantity == q, "Trial"]) {
dd[dd$Subject==s & dd$Item==i & dd$Number==n & dd$Vagueness==v & dd$Quantity == q & dd$Trial==t, "cell"] <- cellcount
measurementcount=measurementcount+1
dd[dd$Subject==s & dd$Item==i & dd$Number==n & dd$Vagueness==v & dd$Quantity == q & dd$Trial==t, "measurement"] = measurementcount
}
}
}
}
}
}
# put dd in better column order
dd <- subset(dd,
select = c(id, Subject, Trial, Condition, Order, Quantity, Vagueness, Number,
Item, item_range_ratio, item_range_ratio_scaled, item_mean_ratio, item_mean_ratio_scaled,
c_Trl, s_Trl, c_Itm, c_Vag, c_Num, f_Cnd, c_Ord, c_Qty,
RT, RT_RecSqd, RT_Rec, RT_RecSqt, RT_log, RT_sqt, RT_raw, RT_sqd,
RTprev, RTprev_RecSqd, RTprev_Rec, RTprev_RecSqt, RTprev_log, RTprev_sqt, RTprev_raw, RTprev_sqd,
Exp_Num, Bline_Num, Extr_Num, Exp_side, Bline_side, Extr_side, response_num, response_side, response_category, Left, Mid, Right,
Instruction, nchar_instr,
cell, measurement
))
save(dd, file="dd.Rda")
} # end of the else clause, which is run if both of the files dat.Rda and dd.Rda do not exist.
Compare distributions of the various transformations of RT against random samples from normal distributions with the same mean and sd to see which transformations best approximate normal distributions.
## plot the comparison of the transformations against their normal equivalents to see distributional properties using full data
# first create a data frame specially for transformations
# this won't be used for analysis, just for this plot
dat_transforms <- dat
# dfSubset says whether any data points were removed
dat_transforms$dfSubset <- "original"
# RTtype says whether the row contains data for an observed RT or
# a sample from a normal distribution with the same mean and sd
dat_transforms$RTtype <- "observed"
# create extra rows for dfs with removed (outlier) data points
dat1 <- dat_transforms # nrow 7680
dat2 <- subset(dat_transforms, RT>1) # nrow 7678
dat2$dfSubset <- "RT>1"
dat3 <- subset(dat_transforms, RT>1 & RT<59999) # nrow 7677
dat3$dfSubset <- "RT>1&RT<59999"
dat4 <- subset(dat_transforms, RT>1 & RT<59999 & RT<30001) # nrow 7658
dat4$dfSubset <- "RT>1&RT<30001"
# bring the dfs back into main df
dat_transforms <- rbind(dat1, dat2, dat3, dat4)
dat_transforms$dfSubset <- factor(dat_transforms$dfSubset, levels=c("original", "RT>1", "RT>1&RT<59999", "RT>1&RT<30001"))
# create extra rows for normally distributed transformed variables
dato = dat_transforms
datn = dat_transforms; datn$RTtype <- "normal"
dat_transforms <- rbind(dato,datn)
dat_transforms$RTtype <- factor(dat_transforms$RTtype, levels=c("observed","normal"))
# for safety
dat_transforms[dat_transforms$RTtype=="normal", c("RT_RecSqd", "RT_Rec", "RT_RecSqt", "RT_log", "RT_sqt", "RT_raw", "RT_sqd")] <- NA
# Create the normally distributed transformed variables, replacing the NAs declared above
for (k in levels(dat_transforms$dfSubset)){
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_RecSqd"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqd"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqd"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqd"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_Rec"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_Rec"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_Rec"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_Rec"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_RecSqt"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqt"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqt"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_RecSqt"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_log"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_log"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_log"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_log"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_sqt"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqt"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqt"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqt"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_raw"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_raw"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_raw"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_raw"]))
dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="normal", "RT_sqd"] <-
rnorm(n=length(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqd"]),
mean=mean(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqd"]),
sd=sd(dat_transforms[dat_transforms$dfSubset==k & dat_transforms$RTtype=="observed", "RT_sqd"]))
} # end of create normally distributed equivalents for the data transforms.
temp <- reshape2::melt(dat_transforms,
id.vars=c("dfSubset", "RTtype", "Item", "Vagueness", "Number", "Quantity", "Order"),
measure.vars=c("RT_RecSqd", "RT_Rec", "RT_RecSqt", "RT_log", "RT_sqt", "RT_raw", "RT_sqd"),
variable.name="transformation",
value.name="score")
ggplot(temp, aes(score, colour=RTtype)) +
geom_freqpoly() +
facet_wrap(dfSubset~transformation, scales="free", nrow=4) +
theme_bw() +
theme(aspect.ratio=1) +
scale_color_manual(values=c("blue","red"))
Show how transformations of RT affect distribution of times, and how they affect which times are outliers.
# allow reference to transformation
subdata = reshape2::melt(dd,
measure.vars=c("RT_Rec", "RT_RecSqt", "RT_log", "RT_raw"),
variable.name="transformation",
value.name="value")
# boxplots for subjects and items separately for each dependent variable
ggplot(subdata) +
facet_wrap(~ transformation, scales="free", ncol=4) +
geom_boxplot(aes(y=value, x=Item, col="Items")) +
geom_boxplot(aes(y=value, x=Subject, col="Subjects")) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1), aspect.ratio=1, panel.grid=element_blank()) +
scale_colour_manual("",values=c("Items"="red","Subjects"="blue")) +
xlab("") + ylab("")
Show how mean times for individual subjects and items vary with respect to the grand mean RT.
# Show how mean times for individual subjects and items vary with respect to the grand mean RT.
subs=ddply(dd, .(level=Subject),
summarise, effect="subject", emean=mean(dd$RT), lmean=mean(RT),
ldiff=mean(RT)-mean(dd$RT), lsign=ifelse(mean(RT)-mean(dd$RT)>0, "slow", "fast"))
itms=ddply(dd, .(level=Item),
summarise, effect="item", emean=mean(dd$RT), lmean=mean(RT),
ldiff=mean(RT)-mean(dd$RT), lsign=ifelse(mean(RT)-mean(dd$RT)>0, "slow", "fast"))
si=rbind(subs,itms)
si$effect <- factor(si$effect, levels=c("subject", "item"))
si$lsign <- factor(si$lsign, levels=c("slow","fast"))
my1 <- xyplot(data=si, lmean~level | effect,
aspect=1,
layout=c(2,1),
ylim = extendrange(si$lmean, f = 0.10),
par.settings=list(trellis.par.set(superpose.symbol = list(col = c("blue","red"), pch=19),
superpose.line=list(col = c("blue","red")))),
groups=si$lsign,
xlab=NULL,
ylab="RT (in ms)",
key=list(text=list(levels(si$lsign)),
lines=list(lty=c(1,1), col=trellis.par.get('superpose.line')$col,
pch=c(19,19), type='o'), divide=1),
mean=unique(si$emean),
scales=list(x=list(relation="free", at=list(15.5, 32.5),
labels=list("subject number", "item identifier"), cex=1 ),
y=list(at=c(0, 2000, unique(si$emean), 4000, 6000, 8000, 10000),
labels=c(0, 2000, "Mean", 4000, 6000, 8000, 10000), cex=.9) ),
panel=function(x,y,type,subscripts,groups=groups,...){
panel.xyplot(x,y,type='p',groups=groups, subscripts=subscripts,...)
panel.arrows(x0=x, y0=unique(si$emean), x1=x, y1=y, groups=groups, subscripts=subscripts,
code=3, length=0, col=trellis.par.get('superpose.line')$col[groups][subscripts], ...)
panel.abline(h=unique(si$emean), col="lightgrey",lty=1)
panel.text(x, y=(ifelse(y<unique(si$emean),y-300,y+300)),
as.numeric(si$level)[subscripts][panel.number()==1], cex=.75)
panel.text(x, y=(ifelse(y<unique(si$emean),y-300,y+300)),
as.character(si$level)[subscripts][panel.number()==2], cex=.75)
}) # end of my1
my2 <- xyplot(data=si, ldiff~level | effect,
aspect=1, type='n',
ylab="Deviation from mean RT (in ms)",
ylim=extendrange(si$ldiff,f=0.10)
) # creates a second y-axis scale
print(doubleYScale(my1, my2,
add.axis=TRUE,
add.ylab2=TRUE,
use.style=FALSE))
Show how mean times for individual subjects and items vary with respect to the grand mean RT_RecSqt.
# Show how mean times for individual subjects and items vary with respect to the grand mean RT_RecSqt.
subs <- ddply(dd, .(level=Subject), summarise,
effect="subject", emean=mean(dd$RT_RecSqt), lmean=mean(RT_RecSqt),
ldiff=mean(RT_RecSqt)-mean(dd$RT_RecSqt),
lsign=ifelse(mean(RT_RecSqt)-mean(dd$RT_RecSqt)<0,"slow","fast"))
itms <- ddply(dd, .(level=Item), summarise, effect="item", emean=mean(dd$RT_RecSqt), lmean=mean(RT_RecSqt),
ldiff=mean(RT_RecSqt)-mean(dd$RT_RecSqt),
lsign=ifelse(mean(RT_RecSqt)-mean(dd$RT_RecSqt)<0,"slow","fast"))
si <- rbind(subs,itms)
si$effect <- factor(si$effect, levels=c("subject", "item"))
si$lsign <- factor(si$lsign, levels=c("slow","fast"))
my1 <- xyplot(data=si, lmean~level | effect,
aspect=1,
layout=c(2,1),
ylim = extendrange(si$lmean, f = 0.10),
par.settings = list(trellis.par.set(superpose.symbol = list(col = c("blue","red"), pch=19), superpose.line=list(col = c("blue","red")))),
groups = si$lsign,
xlab = NULL,
ylab = "RT_RecSqt: reciprocal of square root of RT in ms\n",
key=list(text=list(levels(si$lsign)),
lines=list(lty=c(1,1),
col=trellis.par.get('superpose.line')$col,
pch=c(19,19), type='o'),
divide=1),
mean = unique(si$emean),
scales = list(x=list(relation="free", at=list(15.5, 32.5),
labels=list("subject number", "item identifier"), cex=1 ) ),
panel=function(x,y,type,subscripts,groups=groups,...){
panel.xyplot(x,y,type='p',groups=groups, subscripts=subscripts,...)
panel.arrows(x0=x, y0=unique(si$emean), x1=x, y1=y,
groups=groups, subscripts=subscripts, code=3, length=0, col=trellis.par.get('superpose.line')$col[groups][subscripts], ...)
panel.abline(h=unique(si$emean), col="lightgrey",lty=1)
panel.text(x, y=(ifelse(y<unique(si$emean),y-.001,y+.001)),
as.numeric(si$level)[subscripts][panel.number()==1], cex=.75)
panel.text(x, y=(ifelse(y<unique(si$emean),y-.001,y+.001)),
as.character(si$level)[subscripts][panel.number()==2], cex=.75)
})
my2 <- xyplot(data=si, ldiff~level | effect,
aspect=1, type='n',
ylab="\nDeviation from mean RT_RecSqt (in ms)",
ylim=extendrange(si$ldiff,f=0.10))
print(doubleYScale(my1, my2, add.axis=TRUE, add.ylab2=TRUE, use.style=FALSE))
Show how practice and fatigue effects vary over subjects
# Show how practice/fatigue effects vary over subjects
ggplot(dd, aes(y=RT_RecSqt, x=Trial)) +
facet_wrap(~Subject,nrow=3) +
theme(panel.grid=element_blank(),
panel.margin = unit(0, "lines"),
panel.background = element_rect(fill = 'white', colour='black' ),
legend.position="top") +
scale_x_continuous("Trial", breaks=NULL) +
geom_point(pch=19, cex=.25, alpha=.5) +
stat_smooth(fill='lightblue',col="red",lwd=.5, show.legend=TRUE) +
theme(aspect.ratio = 1)
Show practice and fatigue effects vary over items.
# Show how practice/fatigue effects vary over items
ggplot(dd, aes(y=RT_RecSqt, x=Trial)) +
facet_grid(~Item) +
theme(panel.grid=element_blank(), panel.margin = unit(0, "lines"),
panel.background = element_rect(fill = 'white', colour='black' ),
legend.position="top") +
scale_x_continuous("Trial") +
geom_point(pch=19, cex=.25, alpha=.5) +
stat_smooth(fill='lightblue',col="red",lwd=.5, show.legend=TRUE) +
theme(aspect.ratio = 1)
Main effects
Plot main effects on several transformations
# plot main effects on observed data
# allow reference to transformation
temp <- reshape2::melt(dd,
id.vars=c("Item", "Vagueness", "Number", "Quantity", "Order"),
measure.vars=c("RT_Rec", "RT_RecSqt", "RT_log", "RT_raw"),
variable.name="transformation",
value.name="score")
# allow reference to effect
temp2 <- reshape2::melt(temp,
id.vars=c("transformation", "score"),
measure.vars=c("Item", "Vagueness", "Number", "Quantity", "Order"),
variable.name="effect",
value.name="level")
# do the plot
ggplot(temp2, aes(y=score, x=level, group=1, col=effect)) +
facet_wrap(~transformation + effect, scales="free", shrink=TRUE, ncol=5) +
stat_summary(fun.data=mean_cl_normal, geom="pointrange") +
stat_summary(fun.y=mean, geom="line") +
theme(aspect.ratio=1, axis.text.y=element_blank())
Main effects
# prep main effects rts data frame
vrts <- cbind(Effect="Vagueness",x=factor(c(1,2)),summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness")));names(vrts)[3]<-"Levels"
nrts <- cbind(Effect="Number",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number")));names(nrts)[3]<-"Levels"
qrts <- cbind(Effect="Quantity",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Quantity")));names(qrts)[3]<-"Levels"
orts <- cbind(Effect="Order",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Order")));names(orts)[3]<-"Levels"
irts <- cbind(Effect="Item",x=factor(c(1,2,3,4)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Item")));names(irts)[3]<-"Levels"
rts=rbind(vrts,nrts,qrts,orts)
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
# plot main effects
ggplot(rts, aes(y=RT_RecSqt, x=Levels, group=1, ymin=mins, ymax=maxs)) +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
facet_grid(~Effect, scale="free_x") +
theme_bw() +
theme(aspect.ratio=1)
Main effects over items
vrts <- cbind(Effect="Vagueness",x=factor(c(1,2)),summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio","Vagueness")));names(vrts)[4]<-"Levels"
nrts <- cbind(Effect="Number",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio","Number")));names(nrts)[4]<-"Levels"
qrts <- cbind(Effect="Quantity",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio","Quantity")));names(qrts)[4]<-"Levels"
orts <- cbind(Effect="Order",x=factor(c(1,2)), summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio","Order")));names(orts)[4]<-"Levels"
rts=rbind(vrts,nrts,qrts,orts)
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))
ggplot(rts,
aes(y=RT_RecSqt, x=item_mean_ratio, group=Levels, ymin=mins,
ymax=maxs, shape=Levels, fill=Levels, col=Levels)) +
scale_shape_manual(values=c(21,21,22,22,23,23,24,24)) +
scale_fill_manual(values=c("black", "gray", "black", "gray", "black", "gray", "black", "gray")) +
scale_color_manual(values=c("black", "gray", "black", "gray", "black", "gray", "black", "gray")) +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
facet_grid(~Effect) +
theme_bw() +
theme(aspect.ratio=1,
panel.grid=element_blank()) +
scale_x_continuous(breaks = round(sort(unique(rts$item_mean_ratio)),1))
2 way interactions not involving items.
# prep 2 ways rts data frame
rts1 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness","Number")); names(rts1)[1] <- "F1"; names(rts1)[2] <- "F2"
rts1$Effect <- rep("Vagueness:Number",4)
rts2 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness","Quantity")); names(rts2)[1] <- "F1"; names(rts2)[2] <- "F2"
rts2$Effect <- rep("Vagueness:Quantity",4)
rts3 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness","Order")); names(rts3)[1] <- "F1"; names(rts3)[2] <- "F2"
rts3$Effect <- rep("Vagueness:Order",4)
rts5 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Quantity")); names(rts5)[1] <- "F1"; names(rts5)[2] <- "F2"
rts5$Effect <- rep("Number:Quantity",4)
rts6 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Order")); names(rts6)[1] <- "F1"; names(rts6)[2] <- "F2"
rts6$Effect <- rep("Number:Order",4)
rts8 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Quantity","Order")); names(rts8)[1] <- "F1"; names(rts8)[2] <- "F2"
rts8$Effect <- rep("Quantity:Order",4)
rts <- rbind(rts1,rts2,rts3,rts5,rts6,rts8)
rts <- subset(rts, select=c(Effect, F1, F2, RT_RecSqt, RT_RecSqt_norm, sd, se, ci))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
# plot 2 way interactions
ggplot(rts, aes(y=RT_RecSqt, x=F2, group=F1, ymin=mins, ymax=maxs, pch=F1)) +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
facet_wrap(~Effect, scale="free_x", nrow=1) +
theme_bw() +
theme(aspect.ratio=1) +
theme(legend.title=element_blank()) +
xlab("Levels")
Plot 2 way interactions over items
# prep 2 ways over items rts data frame
rts1 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Vagueness","Number")); names(rts1)[2] <- "F1"; names(rts1)[3] <- "F2"
rts1$E1 <- "Vagueness"
rts1$E2 <- "Number"
rts2 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Vagueness","Quantity")); names(rts2)[2] <- "F1"; names(rts2)[3] <- "F2"
rts2$E1 <- "Vagueness"
rts2$E2 <- "Quantity"
rts3 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Vagueness","Order")); names(rts3)[2] <- "F1"; names(rts3)[3] <- "F2"
rts3$E1 <- "Vagueness"
rts3$E2 <- "Order"
rts4 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Quantity", "Number")); names(rts4)[2] <- "F1"; names(rts4)[3] <- "F2"
rts4$E1 <- "Quantity"
rts4$E2 <- "Number"
rts5 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Order","Number")); names(rts5)[2] <- "F1"; names(rts5)[3] <- "F2"
rts5$E1 <- "Order"
rts5$E2 <- "Number"
rts6 <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("item_mean_ratio", "Quantity","Order")); names(rts6)[2] <- "F1"; names(rts6)[3] <- "F2"
rts6$E1 <- "Quantity"
rts6$E2 <- "Order"
rts <- rbind(rts1,rts2,rts3,rts4,rts5,rts6)
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))
rts$F3 <- factor(paste(rts$F1,rts$F2))
rts$E1 <- factor(rts$E1, levels=c("Number","Order","Quantity","Vagueness"))
rts$E2 <- factor(rts$E2, levels=c("Number","Order","Quantity","Vagueness"))
# The palette with black:
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
ggplot(rts, aes(y=RT_RecSqt, x=item_mean_ratio, group=F3, col=F2, pch=F1)) +
facet_grid(E2~E1, drop=F) +
geom_point(cex=3) +
geom_line() +
theme_bw() +
theme(aspect.ratio=1, panel.grid=element_blank()) +
scale_color_manual(name="colour",values=cbbPalette) +
scale_shape_discrete(name="shape") +
scale_x_continuous(breaks = round(sort(unique(rts$item_mean_ratio)),1))
Plot vagueness by number interaction over items
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Vagueness","Item"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
dodge = position_dodge(width=0.2)
ggplot(rts, aes(y=RT_RecSqt, x=Item, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci,
group=condition, shape=condition, fill=condition)) +
geom_line(position=dodge) +
geom_errorbar(width=.2, position=dodge) +
geom_point(size=4, position=dodge) +
scale_shape_manual("",values = c(22, 22, 21, 21)) +
scale_fill_manual("",values=c("black","white","black","white")) +
ggtitle("Response time") +
ylab("RT_RecSqt") +
xlab("") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.key = element_blank(), aspect.ratio=1,
axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_grid(~Number)
3 way interactions
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Vagueness","Quantity"))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
p1=ggplot(rts, aes(y=RT_RecSqt, x=Number, group=Vagueness, ymin=mins, ymax=maxs, pch=Vagueness)) +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
facet_grid(~Quantity, scale="free_x") +
theme_bw() +
theme(aspect.ratio=1) +
theme(legend.title=element_blank(), legend.position="top")
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Vagueness","Order"))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
p2=ggplot(rts, aes(y=RT_RecSqt, x=Number, group=Vagueness, ymin=mins, ymax=maxs, pch=Vagueness)) +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
facet_grid(~Order, scale="free_x") +
theme_bw() +
theme(aspect.ratio=1) +
theme(legend.title=element_blank(), legend.position="top")
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Quantity","Order"))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
p3=ggplot(rts, aes(y=RT_RecSqt, x=Number, group=Quantity, ymin=mins, ymax=maxs, pch=Quantity)) +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
facet_grid(~Order, scale="free_x") +
theme_bw() +
theme(aspect.ratio=1) +
theme(legend.title=element_blank(), legend.position="top")
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Vagueness","Quantity","Order"))
rts$mins=rts$RT_RecSqt-rts$ci
rts$maxs=rts$RT_RecSqt+rts$ci
p4=ggplot(rts, aes(y=RT_RecSqt, x=Vagueness, group=Quantity, ymin=mins, ymax=maxs, pch=Quantity)) +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
facet_grid(~Order, scale="free_x") +
theme_bw() +
theme(aspect.ratio=1) +
theme(legend.title=element_blank(), legend.position="top")
grid.arrange(nrow=2, p1,p2,p3,p4)
Plot vagueness by number by quantity over items.
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number","Vagueness","Item", "Quantity"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
dodge = position_dodge(width=0.2)
ggplot(rts, aes(y=RT_RecSqt, x=Item, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci,
group=Vagueness,
fill=Vagueness)) +
geom_line(position=dodge) +
geom_errorbar(width=.2, position=dodge) +
geom_point(pch=21, size=4, position=dodge) +
scale_fill_manual("",values=c("black","white")) +
ggtitle("Response time") +
ylab("RT_Rec_Sqt") +
xlab("") +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background=element_rect(fill="white",color=1),
legend.key = element_blank(),
aspect.ratio=1, axis.text.x = element_text(angle = 45, hjust = 1)) +
facet_grid(~Number+Quantity)
4 way interaction
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number", "Vagueness", "Quantity", "Order"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
ggplot(rts, aes(y=RT_RecSqt, x=Vagueness, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci, group=Quantity, pch=Quantity)) +
facet_grid(~Order+Number, scale="free_x") +
geom_line() +
geom_errorbar(width=.1) +
geom_point(cex=3) +
theme_bw() +
theme(aspect.ratio=1) +
theme(legend.title=element_blank())
4 way interaction split over items
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number", "Vagueness", "Item", "Quantity", "Order"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
ggplot(rts, aes(y=RT_RecSqt, x=Item, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci,
group=Vagueness,fill=Vagueness)) +
geom_line() +
geom_errorbar(width=.12) +
geom_point(pch=21, size=2) +
scale_fill_manual("",values=c("black","white","black","white")) +
ggtitle("Number * Vagueness * Quantity * Order") +
ylab("RT_RecSqt") +
xlab("") +
theme_bw() +
theme( legend.key = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.x = element_text(angle = 45, hjust = 1), aspect.ratio=1) +
facet_grid(Vagueness+Order~Number+Quantity)
4 way interaction split over items showing ratio of item dots.
rts <- summarySEwithin(dd, measurevar="RT_RecSqt", withinvars=c("Number", "Vagueness", "item_mean_ratio", "Quantity", "Order"))
rts$condition=paste(rts$Number,rts$Vagueness, sep= ' ')
rts$item_mean_ratio <- as.numeric(as.character(rts$item_mean_ratio))
#dodge = position_dodge(width=0.2)
ggplot(rts, aes(y=RT_RecSqt, x=item_mean_ratio, ymin=RT_RecSqt-ci, ymax=RT_RecSqt+ci,
group=Vagueness,fill=Vagueness)) +
# geom_line(position=dodge) +
geom_line() +
geom_errorbar(width=.02) +
geom_point(pch=21, size=2) +
scale_fill_manual("",values=c("black","white","black","white")) +
ggtitle("Number * Vagueness * Quantity * Order") +
ylab("RT_RecSqt") +
xlab("") +
theme_bw() +
theme( legend.key = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.x = element_text(angle = 45, hjust = 1), aspect.ratio=1) +
facet_grid(Vagueness+Order~Number+Quantity)
Pairs plot
pairs(dd[,c("RT_RecSqt", "Number", "Vagueness", "RTprev_RecSqt")], pch = 19, cex=.075)
Build the model
## do ols model
# define datadist
dd.dd = datadist(dd)
options(datadist = "dd.dd")
# build model
v4 <- ols(data=dd,
RT_RecSqt ~
c_Vag + c_Num + c_Qty + c_Ord +
c_Num:c_Vag:c_Qty+
item_mean_ratio +
#c_Vag:c_Num +
#c_Vag:c_Num:item_mean_ratio +
# pol(s_Trl, 3) +
s_Trl+
# pol(RTprev_RecSqt,3) +
RTprev_RecSqt +
nchar_instr
# cell+
# measurement,
,
x=T, y=T )
# print out p value for likelihood ratio with df
cat("lik.ratio p value, whether the model as a whole is explanatory: p =", 1-pchisq(v4$stats[2],v4$stats[3]))
## lik.ratio p value, whether the model as a whole is explanatory: p = 0
# print out R2 for the model
cat("R^2 = ", v4$stats[4])
## R^2 = 0.2898629
Show the model tables
# show model
v4
##
## Linear Regression Model
##
## ols(formula = RT_RecSqt ~ c_Vag + c_Num + c_Qty + c_Ord + c_Num:c_Vag:c_Qty +
## item_mean_ratio + s_Trl + RTprev_RecSqt + nchar_instr, data = dd,
## x = T, y = T)
## Model Likelihood Discrimination
## Ratio Test Indexes
## Obs 7677 LR chi2 2627.82 R2 0.290
## sigma 0.0062 d.f. 9 R2 adj 0.289
## d.f. 7667 Pr(> chi2) 0.0000 g 0.004
##
## Residuals
##
## Min 1Q Median 3Q Max
## -0.0209659 -0.0040909 0.0000796 0.0042312 0.0196345
##
## Coef S.E. t Pr(>|t|)
## Intercept 0.0314 0.0009 35.36 <0.0001
## c_Vag -0.0007 0.0001 -4.57 <0.0001
## c_Num 0.0043 0.0001 29.65 <0.0001
## c_Qty 0.0009 0.0001 6.26 <0.0001
## c_Ord -0.0004 0.0001 -2.57 0.0103
## item_mean_ratio -0.0078 0.0006 -12.75 <0.0001
## s_Trl 0.0007 0.0001 10.12 <0.0001
## RTprev_RecSqt 0.0030 0.0001 42.11 <0.0001
## nchar_instr -0.0001 0.0000 -2.79 0.0053
## c_Vag * c_Num * c_Qty -0.0011 0.0006 -1.93 0.0537
# show model summary
summary(v4)
## Effects Response : RT_RecSqt
##
## Factor Low High Diff. Effect S.E.
## c_Vag -0.50000 0.50000 1.000000 -0.00094483 2.0041e-04
## c_Num -0.50000 0.50000 1.000000 0.00400970 2.0586e-04
## c_Qty -0.50000 0.50000 1.000000 0.00060798 1.9988e-04
## c_Ord -0.50000 0.50000 1.000000 -0.00036185 1.4096e-04
## item_mean_ratio 0.68765 0.76916 0.081509 -0.00063266 4.9612e-05
## s_Trl -0.85921 0.87274 1.732000 0.00125240 1.2380e-04
## RTprev_RecSqt -0.61685 0.63201 1.248900 0.00375950 8.9289e-05
## nchar_instr 30.00000 36.00000 6.000000 -0.00040202 1.4411e-04
## Lower 0.95 Upper 0.95
## -0.00133770 -5.5197e-04
## 0.00360610 4.4132e-03
## 0.00021616 9.9980e-04
## -0.00063817 -8.5528e-05
## -0.00072991 -5.3541e-04
## 0.00100980 1.4951e-03
## 0.00358450 3.9346e-03
## -0.00068451 -1.1952e-04
##
## Adjusted to: c_Vag=-0.5 c_Num=-0.5 c_Qty=-0.5
# do anova
an1 = anova(v4)
## Warning in anova.rms(v4): tests of nonlinear interaction with respect to single component
## variables ignore 3-way interactions
# show anova
an1
## Analysis of Variance Response: RT_RecSqt
##
## Factor d.f. Partial SS
## c_Vag (Factor+Higher Order Factors) 2 0.0009653558
## All Interactions 1 0.0001419055
## c_Num (Factor+Higher Order Factors) 2 0.0338247101
## All Interactions 1 0.0001419055
## c_Qty (Factor+Higher Order Factors) 2 0.0016309290
## All Interactions 1 0.0001419055
## c_Ord 1 0.0002512303
## item_mean_ratio 1 0.0061997569
## s_Trl 1 0.0039022370
## RTprev_RecSqt 1 0.0675901413
## nchar_instr 1 0.0002967003
## c_Vag * c_Num * c_Qty (Factor+Higher Order Factors) 1 0.0001419055
## REGRESSION 9 0.1193124785
## ERROR 7667 0.2923044952
## MS F P
## 4.826779e-04 12.66 <.0001
## 1.419055e-04 3.72 0.0537
## 1.691236e-02 443.60 <.0001
## 1.419055e-04 3.72 0.0537
## 8.154645e-04 21.39 <.0001
## 1.419055e-04 3.72 0.0537
## 2.512303e-04 6.59 0.0103
## 6.199757e-03 162.62 <.0001
## 3.902237e-03 102.35 <.0001
## 6.759014e-02 1772.86 <.0001
## 2.967003e-04 7.78 0.0053
## 1.419055e-04 3.72 0.0537
## 1.325694e-02 347.72 <.0001
## 3.812502e-05
plot ‘partial effects’
## plot partial effects
# Compute Predicted Values and Confidence Limits
p1=Predict(v4)
# plot 'partial effects'
plot(p1, anova=an1, pval=T, aspect=1, main="ols model v4")
# add predicted RT_RecSqt to dd
dd$RT_Predicted <- predict (v4)
Plot model coefficients and ci’s
par(las=2, mar=c(14,6,1,1))
y=coef(v4) # with intercept
n=length(y)
y0=confint(v4)[,1]
y1=confint(v4)[,2]
y=coef(v4)[-1] # omit intercept
n=length(y)
y0=confint(v4)[-1,1]
y1=confint(v4)[-1,2]
plot(y, xaxt="n", xlab="", ylab="RT_RecSqt\n", pch=19, ylim=extendrange(y, f=.10), main="Speed")
abline(h=0)
axis(1, labels=names(y) , at=1:n)
grid()
segments(x0=1:n, x1=1:n, y0, y1, lwd=2)
Collinearity is assessed by the condition number \(k\).
The greater the collinearity, the closer the matrix of predictors is is to becoming singular.
\(k\) estimates the extent to which a matrix is \(\text{singular}\)
\(0\ldots6\) no collinearity
\(\text{around} 15\) medium collinearity
\(>30\) indicates potentially harmful collinearity
cat("k is:",
collin.fnc(dd[, c("s_Trl", "c_Num", "c_Vag", "c_Qty", "c_Ord", "item_mean_ratio",
"RTprev_RecSqt", "nchar_instr", "measurement")
])$cnumber
)
## k is: 38.03642
par(mar=c(1.1,3.1,1.1,1.1), pty='s')
plot(varclus(
as.matrix(dd[,c("s_Trl", "c_Num", "c_Vag", "c_Qty", "c_Ord",
"item_mean_ratio", "RTprev_RecSqt", "nchar_instr", "measurement")])))
Model criticism baayen plots
# Baayen 4-plot model criticism
par(mfrow=c(1,4), pty='s')
# create scaled residuals
v4$rstand = as.vector(scale(resid(v4)))
# plot scaled residuals density
plot(density(v4$rstand), main='density of\nscaled residuals')
# plot sample quantiles versus theoretical quantiles
qqnorm(v4$rstand, cex=.5)
qqline(v4$rstand)
# plot standardised residuals versus fitted values
plot(v4$rstand ~ fitted(v4), pch='.', main='std resid\nv fitted')
# absolute standardised residuals greater than 2.5 are candidates
# for being outliers, the abline identifies them on the plot
abline(h=c(-2.5,2.5))
# create dffits
dffits=abs(resid(v4, "dffits"))
# plot dffits
plot(dffits, type='h', main='dffits')
# here no overly influential
w = which.influence(fit = v4, cutoff = 0.4)
if (length(w)==0) {
print('There are no overly influential data points')
} else {
print('There are some overly influential variables:');
print(w)
}
## [1] "There are no overly influential data points"
Model validation
Validation tests overfitting using bootstrap.
Bootstrapping draws as a bootstrap sample the same number of samples as the original data, at random with replacement, from the original data.
Then fit the model to the data in the bootstrap sample, and use this model to predict the reaction times for the original full data (which contains many samples that are new to the bootstrap model).
Then compare the goodness of fit of the bootstrap model with the goodness of fit of the original model.
Averaged over a large number of bootstrap models these comparisons of goodness of fit reveal to what extent the original model overfits the data.
Function validate() in rms does this re-sampling validation.
# argument B is the number of bootstrap runs
# argument pr is whether to print the results of each run
v_1 <- validate(v4, bw = T, B = 200, pr=FALSE, estimates=TRUE)
##
## Backwards Step-down - Original Model
##
## No Factors Deleted
##
## Factors in Final Model
##
## [1] c_Vag c_Num c_Qty
## [4] c_Ord item_mean_ratio s_Trl
## [7] RTprev_RecSqt nchar_instr c_Vag * c_Num * c_Qty
# in print.validate, B is the number of re-samples to show outocmes for each resample
print(v_1, B=0)
## index.orig training test optimism index.corrected n
## R-square 0.2899 0.2907 0.2887 0.0020 0.2878 200
## MSE 0.0000 0.0000 0.0000 0.0000 0.0000 200
## g 0.0045 0.0045 0.0045 0.0000 0.0044 200
## Intercept 0.0000 0.0000 0.0001 -0.0001 0.0001 200
## Slope 1.0000 1.0000 0.9956 0.0044 0.9956 200
# show how often each number of variables kept was observed across the 200 runs
xtabs(~rowSums(attr(v_1,"kept")))
## rowSums(attr(v_1, "kept"))
## 6 7 8 9
## 2 31 71 96
load("dd.Rda")
v5 <- lmerTest::lmer(data=dd,
RT_RecSqt ~
c_Vag + c_Num + c_Qty + c_Ord +
c_Num:c_Vag:c_Qty +
item_mean_ratio +
s_Trl +
RTprev_RecSqt +
nchar_instr +
(1+c_Vag + c_Num + c_Qty + c_Ord|Subject)
)
summary(v5)
## Linear mixed model fit by REML t-tests use Satterthwaite approximations
## to degrees of freedom [lmerMod]
## Formula: RT_RecSqt ~ c_Vag + c_Num + c_Qty + c_Ord + c_Num:c_Vag:c_Qty +
## item_mean_ratio + s_Trl + RTprev_RecSqt + nchar_instr + (1 +
## c_Vag + c_Num + c_Qty + c_Ord | Subject)
## Data: dd
##
## REML criterion at convergence: -59041.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8627 -0.6682 0.0131 0.6682 3.7743
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## Subject (Intercept) 1.647e-05 0.0040589
## c_Vag 1.034e-07 0.0003215 0.26
## c_Num 9.648e-06 0.0031061 -0.41 -0.29
## c_Qty 1.089e-06 0.0010436 0.19 -0.10 -0.27
## c_Ord 2.145e-07 0.0004631 -0.15 0.31 -0.46 -0.68
## Residual 2.528e-05 0.0050278
## Number of obs: 7677, groups: Subject, 30
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.132e-02 1.035e-03 1.080e+02 30.268 < 2e-16 ***
## c_Vag -6.495e-04 1.331e-04 3.300e+01 -4.881 2.57e-05 ***
## c_Num 4.155e-03 5.792e-04 2.900e+01 7.174 6.61e-08 ***
## c_Qty 8.523e-04 2.225e-04 2.900e+01 3.831 0.000627 ***
## c_Ord -2.833e-04 1.426e-04 4.400e+01 -1.987 0.053149 .
## item_mean_ratio -7.953e-03 4.957e-04 7.558e+03 -16.045 < 2e-16 ***
## s_Trl 1.141e-03 5.867e-05 7.564e+03 19.443 < 2e-16 ***
## RTprev_RecSqt 5.765e-04 7.301e-05 7.618e+03 7.897 3.11e-15 ***
## nchar_instr -6.187e-05 1.956e-05 7.558e+03 -3.163 0.001566 **
## c_Vag:c_Num:c_Qty -1.146e-03 4.635e-04 7.558e+03 -2.472 0.013455 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) c_Vag c_Num c_Qty c_Ord itm_m_ s_Trl RTp_RS nchr_n
## c_Vag -0.068
## c_Num -0.260 -0.136
## c_Qty 0.106 -0.035 -0.225
## c_Ord -0.064 0.082 -0.264 -0.348
## item_men_rt -0.323 -0.004 0.001 0.000 0.000
## s_Trl 0.005 0.003 -0.001 -0.002 0.006 -0.017
## RTprv_RcSqt 0.002 -0.006 0.007 0.004 -0.016 0.011 -0.206
## nchar_instr -0.610 0.248 -0.044 0.017 0.000 -0.017 0.001 -0.008
## c_Vg:c_N:_Q 0.084 -0.034 0.006 -0.002 0.000 0.002 -0.001 0.004 -0.138
cat("R^2")
## R^2
cor(fitted(v5), dd$RT_RecSqt)^2
## [1] 0.534303
par(mfrow=c(2,5))
plotLMER.fnc(v5)
## effect size (range) for c_Vag is 0.0003630759
## effect size (range) for c_Num is 0.003868485
## effect size (range) for c_Qty is 0.001138683
## effect size (range) for c_Ord is 0.0002832954
## effect size (range) for item_mean_ratio is 0.002418943
## effect size (range) for s_Trl is 0.003936032
## effect size (range) for RTprev_RecSqt is 0.003352036
## effect size (range) for nchar_instr is 0.0005568158
Plot model coefficients and ci’s
e=data.frame(coef=summary(v5)$coef[-1,1]) # estimates, without intercept
q=as.data.frame(confint(v5, method='Wald')[18:26,1:2])
eq=cbind(e,q)
y=eq[rev(rownames(eq)),]
par(mar=c(4,10,1,1))
plot(y=1:nrow(y), x=y$coef, xlim=range(y$"2.5 %", y$"97.5 %"), type='n', axes=F, xlab="", ylab="")
segments(y0=1:nrow(y), y1=1:nrow(y), x0=y$"2.5 %", x1=y$"97.5 %", lwd=.55)
points(y=1:nrow(y), x=y$coef, pch=20, cex=.75)
abline(v=0, lty=3, col='grey')
axis(2, labels=row.names(y), at=1:nrow(y), las=1)
axis(1, las=1)
mtext("estimate", side=1, line=2.5, cex.lab=1, las=1)
box(col='grey')
Model criticism baayen plots
# Baayen 4-plot model criticism
par(mfrow=c(1,4), pty='s')
# create scaled residuals
dd$rstand = as.vector(scale(resid(v5)))
# plot scaled residuals density
plot(density(dd$rstand))
# plot sample quantiles versus theoretical quantiles
qqnorm(dd$rstand, cex=.5)
qqline(dd$rstand)
# plot standardised residuals versus fitted values
plot(dd$rstand ~ fitted(v5), pch='.')
# absolute standardised residuals greater than 2.5 are candidates for being outliers, the abline identifies them on the plot
abline(h=c(-2.5,2.5))